LICENSE
README.md
setup.py
bigcode_eval/__init__.py
bigcode_eval/arguments.py
bigcode_eval/base.py
bigcode_eval/evaluator.py
bigcode_eval/generation.py
bigcode_eval/utils.py
bigcode_eval.egg-info/PKG-INFO
bigcode_eval.egg-info/SOURCES.txt
bigcode_eval.egg-info/dependency_links.txt
bigcode_eval.egg-info/requires.txt
bigcode_eval.egg-info/top_level.txt
bigcode_eval/tasks/__init__.py
bigcode_eval/tasks/apps.py
bigcode_eval/tasks/codexglue_code_to_text.py
bigcode_eval/tasks/codexglue_text_to_text.py
bigcode_eval/tasks/conala.py
bigcode_eval/tasks/concode.py
bigcode_eval/tasks/ds1000.py
bigcode_eval/tasks/gsm.py
bigcode_eval/tasks/humaneval.py
bigcode_eval/tasks/humanevalpack.py
bigcode_eval/tasks/humanevalpack_openai.py
bigcode_eval/tasks/humanevalplus.py
bigcode_eval/tasks/instruct_humaneval.py
bigcode_eval/tasks/instruct_wizard_humaneval.py
bigcode_eval/tasks/mbpp.py
bigcode_eval/tasks/mbppplus.py
bigcode_eval/tasks/mercury.py
bigcode_eval/tasks/multiple.py
bigcode_eval/tasks/parity.py
bigcode_eval/tasks/python_bugs.py
bigcode_eval/tasks/quixbugs.py
bigcode_eval/tasks/recode.py
bigcode_eval/tasks/santacoder_fim.py
bigcode_eval/tasks/studenteval.py
bigcode_eval/tasks/custom_metrics/__init__.py
bigcode_eval/tasks/custom_metrics/beyond_eval.py
bigcode_eval/tasks/custom_metrics/code_eval.py
bigcode_eval/tasks/custom_metrics/codexglue_code_to_text_bleu.py
bigcode_eval/tasks/custom_metrics/diff_eval.py
bigcode_eval/tasks/custom_metrics/execute.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/__init__.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/containerized_eval.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_cpp.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_cs.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_dlang.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_go.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_java.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_javascript.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_julia.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_lua.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_php.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_pl.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_python.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_r.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_racket.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_ruby.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_rust.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_scala.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_sh.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_swift.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/eval_ts.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/evaluation.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/generic_eval.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/libeval.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/single_experiment_pass_k.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/safe_subprocess/__init__.py
bigcode_eval/tasks/custom_metrics/multiple_metrics/safe_subprocess/module_test.py
bigcode_eval/tasks/custom_metrics/pal_metric/__init__.py
bigcode_eval/tasks/custom_metrics/pal_metric/pal_code_exec.py
bigcode_eval/tasks/custom_metrics/pal_metric/python_executor.py
tests/test_generation_evaluation.py
tests/test_prompts.py